#encoding=utf-8
'''
Created on 2011-11-17

@author: Sunny

python version:2.6
'''
from corpus import *
from corpus.pair_writer import write_pairs, write_pairs_1toN
from ltp.interface import get_tagged_sentence

def tag_pairs(rte_pairs):
    #将rte_pairs.text and rte_pairs.hyp 进行分词，词性标注，命名实体标注
    for i in range(len(rte_pairs)):
        rte_pairs[i].text=get_tagged_sentence(rte_pairs[i].text)
        rte_pairs[i].hyp=get_tagged_sentence(rte_pairs[i].hyp)
    return rte_pairs

def transfer(reader,src_file,writer,dst_file,verbose='full',with_tag = True):
    rte_pairs = reader.pairs(src_file)
    rte_pairs = tag_pairs(rte_pairs)
    writer(rte_pairs, dst_file,verbose,with_tag)
        
    
if __name__ == '__main__':

    # tag the train pairs 
    transfer(rte_pairs_from_train_pairs, train_pairs_file_name, 
             write_pairs, tagged_train_pairs_file_name)


    #: get the 1 to n pairs and then tag them in the train folder   
#    rps = rte_train_pairs_from_hypothesis.pairs(train_hypothesis_file_name)
#    write_pairs_1toN(rps, train_1ton_file_name)
    
    transfer(rte_pairs_from_1ton_pairs, train_1ton_file_name, 
             write_pairs_1toN, tagged_train_1ton_file_name)
    
    #: get the 1 to n pairs and then tag them in the test folder     
    rps = rte_test_pairs_from_hypothesis.pairs(test_hypothesis_file_name)
    write_pairs_1toN(rps, test_1ton_file_name)
    
    transfer(rte_pairs_from_1ton_pairs, test_1ton_file_name, 
             write_pairs_1toN, tagged_test_1ton_file_name)